package at.ac.tuwien.isis.infret;


public class Preprocessor 
{
	private String documentContent;
	
	public Preprocessor(String documentContent)
	{
		this.documentContent = documentContent;
	}
	
	public void doPreprocessing()
	{
		// punctuation entfernen
		//this.documentContent = this.documentContent.replaceAll("[\\?!<>.:;,_-=\\|{}\\[\\]()@/\\-\\+\\*^'\"]", " ");
		this.documentContent = this.documentContent.replaceAll("[^a-zA-Z0-9]", " ");
		this.documentContent = this.documentContent.replaceAll("\\\\", " ");
		// ueberzaehlige whitespaces entfernen
		this.documentContent = this.documentContent.replaceAll("[ \t\n\f\r]{2,}", " ");
		this.documentContent = this.documentContent.toLowerCase();
	}
	
	public void removeWhitespaces()
	{
		
	}
	
	public String getDocumentContent()
	{
		return this.documentContent;
	}
}
